library(readstata13) # package to read Stata 13 dta files
library(tidyverse)
library(lubridate) # Working with dates
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = T)
set.seed(1)


Municipality_data <- read.csv("Data/Municipality_data_2021_01_18.csv") %>%
  as_tibble()
Population_data_extrapolated <- readRDS("Results/Population_data_extrapolated.rds")

mortality_period_municipality_level <- function(start_date, end_date) {
  Municipality_data %>%
    filter(as.numeric(JourDécès) != 0) %>% # excluding 15238 death records
    filter(AnnéeDécès != "") %>% # Filtering what is actually population data
    mutate(Death_date = lubridate::ymd(paste(AnnéeDécès, MoisDécès, JourDécès, sep = "/"))) %>%
    filter(Death_date >= start_date) %>%
    filter(Death_date <= end_date) %>%
    mutate(Municipality_code = Code) %>%
    group_by(Municipality_code) %>%
    summarise(NbDeath = n())
}

selected_years = c(2015, 2016, 2017, 2019, 2020)

Mortality_data <- selected_years %>%
  lapply(function(y) {
    mortality_period_municipality_level(start_date = ymd(paste(y, "/03/15", sep = "")), end_date = ymd(paste(y, "/07/26", sep = ""))) %>%
      mutate(year = y)
  }) %>%
  bind_rows()

Mortality_data_joined <- Population_data_extrapolated %>%
  select(Code, Population, year) %>%
  filter(year %in% selected_years) %>%
  rename(Municipality_code = Code) %>%
  left_join(Mortality_data) %>%
  mutate(NbDeath = ifelse(test = is.na(NbDeath), yes = 0, no = NbDeath))


create_municipality_code_converter = function(Municipality_idx, Municipality_code){
  dict = Municipality_code %>% 
    unique %>% 
    setNames(Municipality_idx %>% unique()) 
  conversion_function = function(x){
    sapply(x, function(xi) dict[xi])
  }
  return(conversion_function)
}

set.seed(0)

selected_cities = Mortality_data_joined %>% 
  filter(year == 2020) %>% 
  filter(Population >= 710) %>% # Filtering which should match the electoral data filtering
  .$Municipality_code

full_data = Mortality_data_joined %>%
  filter(Population>0) %>% 
  filter(Municipality_code %in% selected_cities) %>%
  mutate(covid = ifelse(test = year == 2020, yes = 1, no = 0)) %>%
  mutate(Municipality_idx = as.numeric(as.factor(Municipality_code))) %>% 
  mutate(Municipality_label = as.character(Municipality_idx))

stan_data <- full_data  %>%
  (function(df) {
    list(NbDeath = df$NbDeath, 
         N = length(df$NbDeath), 
         covid = df$covid, 
         Population = df$Population, 
         N_municipalities = length(df$Municipality_code %>% unique()), 
         Municipality_code = df$Municipality_idx %>% as.numeric(), 
         municipality_label = df$Municipality_idx %>% as.character(),
         Municipality_code_converter = create_municipality_code_converter(df$Municipality_idx, df$Municipality_code))
  })

saveRDS(stan_data, file = "Results/stan_data_for_fit_2021_01_18_4weeks_second_round.rds")
